In [1]:

    
%reload_ext autoreload
%autoreload 2
%matplotlib notebook

import sys
sys.path.append('..')

from helper import nn
from helper import logistic_regression as lr
import numpy as np

get data and weights



In [2]:

    
X_raw, y_raw = nn.load_data('ex4data1.mat', transpose=False)
X = np.insert(X_raw, 0, np.ones(X_raw.shape[0]), axis=1)
X.shape









    Out[2]:





(5000, 401)



In [3]:

    
y = nn.expand_y(y_raw)
y.shape









    Out[3]:





(5000, 10)



In [4]:

    
t1, t2 = nn.load_weight('ex4weights.mat')
t1.shape, t2.shape









    Out[4]:





((25, 401), (10, 26))



In [5]:

    
theta = nn.serialize(t1, t2)  # flatten params
theta.shape









    Out[5]:





(10285,)

sigmoid gradient



In [6]:

    
nn.sigmoid_gradient(0)









    Out[6]:





0.25

theta gradient

super hard to get this right... the dimension is so confusing



In [7]:

    
d1, d2 = nn.deserialize(nn.gradient(theta, X, y))



In [8]:

    
d1.shape, d2.shape









    Out[8]:





((25, 401), (10, 26))

gradient checking



In [9]:

    
# nn.gradient_checking(theta, X, y, epsilon= 0.0001)

regularized gradient

Use normal gradient + regularized term



In [10]:

    
# nn.gradient_checking(theta, X, y, epsilon=0.0001, regularized=True)

ready to train the model

remember to randomly initlized the parameters to break symmetry

take a look at the doc of this argument: jac

jac : bool or callable, optional Jacobian (gradient) of objective function. Only for CG, BFGS, Newton-CG, L-BFGS-B, TNC, SLSQP, dogleg, trust-ncg. If jac is a Boolean and is True, fun is assumed to return the gradient along with the objective function. If False, the gradient will be estimated numerically. jac can also be a callable returning the gradient of the objective. In this case, it must accept the same arguments as fun.

it means if your backprop function return (cost, grad), you could set jac=True

This is the implementation of http://nbviewer.jupyter.org/github/jdwittenauer/ipython-notebooks/blob/master/notebooks/ml/ML-Exercise4.ipynb

but I choose to seperate them



In [11]:

    
res = nn.nn_training(X, y)
res









    Out[11]:





     fun: 0.315404193057851
     jac: array([ -9.87946759e-05,  -2.58841584e-08,   5.74177576e-08, ...,
         5.84882083e-05,   8.65063516e-05,   4.38541405e-05])
 message: 'Max. number of function evaluations reached'
    nfev: 400
     nit: 26
  status: 3
 success: False
       x: array([  0.00000000e+00,  -1.29420792e-04,   2.87088788e-04, ...,
         1.79382032e-01,  -1.35645264e+00,  -1.62269753e+00])

show accuracy



In [12]:

    
_, y_answer = nn.load_data('ex4data1.mat')
y_answer[:20]









    Out[12]:





array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
       10, 10, 10], dtype=uint8)



In [13]:

    
final_theta = res.x



In [16]:

    
nn.show_accuracy(final_theta, X, y_answer)









    



             precision    recall  f1-score   support

          1       0.99      0.98      0.98       500
          2       0.96      0.98      0.97       500
          3       0.97      0.93      0.95       500
          4       1.00      0.87      0.93       500
          5       1.00      0.58      0.73       500
          6       0.91      0.99      0.95       500
          7       0.99      0.96      0.97       500
          8       0.87      1.00      0.93       500
          9       0.97      0.95      0.96       500
         10       0.71      1.00      0.83       500

avg / total       0.94      0.92      0.92      5000

show hidden layer



In [15]:

    
nn.plot_hidden_layer(final_theta)



In [ ]: